/*- * See the file LICENSE for redistribution information. * * Copyright (c) 2005-2006 * Sleepycat Software. All rights reserved. * * $Id: DbCacheSize.java,v 1.1 2006/05/06 09:00:39 ckaestne Exp $ */ package com.sleepycat.je.util; import java.io.File; import java.io.PrintStream; import java.math.BigInteger; import java.text.NumberFormat; import java.util.Random; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.Environment; import com.sleepycat.je.EnvironmentConfig; import com.sleepycat.je.EnvironmentStats; import com.sleepycat.je.OperationStatus; import com.sleepycat.je.dbi.MemoryBudget; import com.sleepycat.je.utilint.CmdUtil; /** * Estimating JE in-memory sizes as a function of key and data size is not * straightforward for two reasons. There is some fixed overhead for each btree * internal node, so tree fanout and degree of node sparseness impacts memory * consumption. In addition, JE compresses some of the internal nodes where * possible, but compression depends on on-disk layouts. * * DbCacheSize is an aid for estimating cache sizes. To get an estimate of the * in-memory footprint for a given database, specify the number of records and * record characteristics and DbCacheSize will return a minimum and maximum * estimate of the cache size required for holding the database in memory. * If the user specifies the record's data size, the utility will return both * values for holding just the internal nodes of the btree, and for holding the * entire database in cache. * * Note that "cache size" is a percentage more than "btree size", to cover * general environment resources like log buffers. Each invocation of the * utility returns an estimate for a single database in an environment. For an * environment with multiple databases, run the utility for each database, add * up the btree sizes, and then add 10 percent. * * Note that the utility does not yet cover duplicate records and the API is * subject to change release to release. * * The only required parameters are the number of records and key size. * Data size, non-tree cache overhead, btree fanout, and other parameters * can also be provided. For example: * * $ java DbCacheSize -records 554719 -key 16 -data 100 * Inputs: records=554719 keySize=16 dataSize=100 nodeMax=128 density=80% * overhead=10% * * Cache Size Btree Size Description * -------------- -------------- ----------- * 30,547,440 27,492,696 Minimum, internal nodes only * 41,460,720 37,314,648 Maximum, internal nodes only * 114,371,644 102,934,480 Minimum, internal nodes and leaf nodes * 125,284,924 112,756,432 Maximum, internal nodes and leaf nodes * * Btree levels: 3 * * This says that the minimum cache size to hold only the internal nodes of the * btree in cache is approximately 30MB. The maximum size to hold the entire * database in cache, both internal nodes and datarecords, is 125Mb. */ public class DbCacheSize { private static final NumberFormat INT_FORMAT = NumberFormat.getIntegerInstance(); private static final String HEADER = " Cache Size Btree Size Description\n" + "-------------- -------------- -----------"; // 12345678901234 12345678901234 // 12 private static final int COLUMN_WIDTH = 14; private static final int COLUMN_SEPARATOR = 2; public static void main(String[] args) { try { long records = 0; int keySize = 0; int dataSize = 0; int nodeMax = 128; int density = 80; long overhead = 0; File measureDir = null; boolean measureRandom = false; for (int i = 0; i < args.length; i += 1) { String name = args[i]; String val = null; if (i < args.length - 1 && !args[i + 1].startsWith("-")) { i += 1; val = args[i]; } if (name.equals("-records")) { if (val == null) { usage("No value after -records"); } try { records = Long.parseLong(val); } catch (NumberFormatException e) { usage(val + " is not a number"); } if (records <= 0) { usage(val + " is not a positive integer"); } } else if (name.equals("-key")) { if (val == null) { usage("No value after -key"); } try { keySize = Integer.parseInt(val); } catch (NumberFormatException e) { usage(val + " is not a number"); } if (keySize <= 0) { usage(val + " is not a positive integer"); } } else if (name.equals("-data")) { if (val == null) { usage("No value after -data"); } try { dataSize = Integer.parseInt(val); } catch (NumberFormatException e) { usage(val + " is not a number"); } if (dataSize <= 0) { usage(val + " is not a positive integer"); } } else if (name.equals("-nodemax")) { if (val == null) { usage("No value after -nodemax"); } try { nodeMax = Integer.parseInt(val); } catch (NumberFormatException e) { usage(val + " is not a number"); } if (nodeMax <= 0) { usage(val + " is not a positive integer"); } } else if (name.equals("-density")) { if (val == null) { usage("No value after -density"); } try { density = Integer.parseInt(val); } catch (NumberFormatException e) { usage(val + " is not a number"); } if (density < 1 || density > 100) { usage(val + " is not betwen 1 and 100"); } } else if (name.equals("-overhead")) { if (val == null) { usage("No value after -overhead"); } try { overhead = Long.parseLong(val); } catch (NumberFormatException e) { usage(val + " is not a number"); } if (overhead < 0) { usage(val + " is not a non-negative integer"); } } else if (name.equals("-measure")) { if (val == null) { usage("No value after -measure"); } measureDir = new File(val); } else if (name.equals("-measurerandom")) { measureRandom = true; } else { usage("Unknown arg: " + name); } } if (records == 0) { usage("-records not specified"); } if (keySize == 0) { usage("-key not specified"); } printCacheSizes(System.out, records, keySize, dataSize, nodeMax, density, overhead); if (measureDir != null) { measure(System.out, measureDir, records, keySize, dataSize, nodeMax, measureRandom); } } catch (Throwable e) { e.printStackTrace(System.out); } } private static void usage(String msg) { if (msg != null) { System.out.println(msg); } System.out.println ("usage:" + "\njava " + CmdUtil.getJavaCommand(DbCacheSize.class) + "\n -records <count>" + "\n # Total records (key/data pairs); required" + "\n -key <bytes> " + "\n # Average key bytes per record; required" + "\n [-data <bytes>]" + "\n # Average data bytes per record; if omitted no leaf" + "\n # node sizes are included in the output" + "\n [-nodemax <entries>]" + "\n # Number of entries per Btree node; default: 128" + "\n [-density <percentage>]" + "\n # Percentage of node entries occupied; default: 80" + "\n [-overhead <bytes>]" + "\n # Overhead of non-Btree objects (log buffers, locks," + "\n # etc); default: 10% of total cache size" + "\n [-measure <environmentHomeDirectory>]" + "\n # An empty directory used to write a database to find" + "\n # the actual cache size; default: do not measure" + "\n [-measurerandom" + "\n # With -measure insert randomly generated keys;" + "\n # default: insert sequential keys"); System.exit(2); } private static void printCacheSizes(PrintStream out, long records, int keySize, int dataSize, int nodeMax, int density, long overhead) { out.println("Inputs:" + " records=" + records + " keySize=" + keySize + " dataSize=" + dataSize + " nodeMax=" + nodeMax + " density=" + density + '%' + " overhead=" + ((overhead > 0) ? overhead : 10) + "%"); int nodeAvg = (nodeMax * density) / 100; long nBinEntries = (records * nodeMax) / nodeAvg; long nBinNodes = (nBinEntries + nodeMax - 1) / nodeMax; long nInNodes = 0; int nLevels = 1; for (long n = nBinNodes; n > 0; n /= nodeMax) { nInNodes += n; nLevels += 1; } long minInSize = nInNodes * calcInSize(nodeMax, nodeAvg, keySize, true); long maxInSize = nInNodes * calcInSize(nodeMax, nodeAvg, keySize, false); long lnSize = 0; if (dataSize > 0) { lnSize = records * calcLnSize(dataSize); } out.println(); out.println(HEADER); out.println(line(minInSize, overhead, "Minimum, internal nodes only")); out.println(line(maxInSize, overhead, "Maximum, internal nodes only")); if (dataSize > 0) { out.println(line(minInSize + lnSize, overhead, "Minimum, internal nodes and leaf nodes")); out.println(line(maxInSize + lnSize, overhead, "Maximum, internal nodes and leaf nodes")); } else { out.println("\nTo get leaf node sizing specify -data"); } out.println("\nBtree levels: " + nLevels); } private static int calcInSize(int nodeMax, int nodeAvg, int keySize, boolean lsnCompression) { /* Fixed overhead */ int size = MemoryBudget.IN_FIXED_OVERHEAD; /* Byte state array plus keys and nodes arrays */ size += MemoryBudget.byteArraySize(nodeMax) + (nodeMax * (2 * MemoryBudget.ARRAY_ITEM_OVERHEAD)); /* LSN array */ if (lsnCompression) { size += MemoryBudget.byteArraySize(nodeMax * 2); } else { size += MemoryBudget.BYTE_ARRAY_OVERHEAD + (nodeMax * MemoryBudget.LONG_OVERHEAD); } /* Keys for populated entries plus the identifier key */ size += (nodeAvg + 1) * MemoryBudget.byteArraySize(keySize); return size; } private static int calcLnSize(int dataSize) { return MemoryBudget.LN_OVERHEAD + MemoryBudget.byteArraySize(dataSize); } private static String line(long btreeSize, long overhead, String comment) { long cacheSize; if (overhead == 0) { cacheSize = (100 * btreeSize) / 90; } else { cacheSize = btreeSize + overhead; } StringBuffer buf = new StringBuffer(100); column(buf, INT_FORMAT.format(cacheSize)); column(buf, INT_FORMAT.format(btreeSize)); column(buf, comment); return buf.toString(); } private static void column(StringBuffer buf, String str) { int start = buf.length(); while (buf.length() - start + str.length() < COLUMN_WIDTH) { buf.append(' '); } buf.append(str); for (int i = 0; i < COLUMN_SEPARATOR; i += 1) { buf.append(' '); } } private static void measure(PrintStream out, File dir, long records, int keySize, int dataSize, int nodeMax, boolean randomKeys) throws DatabaseException { String[] fileNames = dir.list(); if (fileNames != null && fileNames.length > 0) { usage("Directory is not empty: " + dir); } Environment env = openEnvironment(dir, true); Database db = openDatabase(env, nodeMax, true); try { out.println("\nMeasuring with cache size: " + INT_FORMAT.format(env.getConfig().getCacheSize())); insertRecords(out, env, db, records, keySize, dataSize, randomKeys); printStats(out, env, "Stats for internal and leaf nodes (after insert)"); db.close(); env.close(); env = openEnvironment(dir, false); db = openDatabase(env, nodeMax, false); out.println("\nPreloading with cache size: " + INT_FORMAT.format(env.getConfig().getCacheSize())); preloadRecords(out, db); printStats(out, env, "Stats for internal nodes only (after preload)"); } finally { try { db.close(); env.close(); } catch (Exception e) { out.println("During close: " + e); } } } private static Environment openEnvironment(File dir, boolean allowCreate) throws DatabaseException { EnvironmentConfig envConfig = new EnvironmentConfig(); envConfig.setAllowCreate(allowCreate); envConfig.setCachePercent(90); return new Environment(dir, envConfig); } private static Database openDatabase(Environment env, int nodeMax, boolean allowCreate) throws DatabaseException { DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setAllowCreate(allowCreate); dbConfig.setNodeMaxEntries(nodeMax); return env.openDatabase(null, "foo", dbConfig); } private static void insertRecords(PrintStream out, Environment env, Database db, long records, int keySize, int dataSize, boolean randomKeys) throws DatabaseException { DatabaseEntry key = new DatabaseEntry(); DatabaseEntry data = new DatabaseEntry(new byte[dataSize]); BigInteger bigInt = BigInteger.ZERO; Random rnd = new Random(123); for (int i = 0; i < records; i += 1) { if (randomKeys) { byte[] a = new byte[keySize]; rnd.nextBytes(a); key.setData(a); } else { bigInt = bigInt.add(BigInteger.ONE); byte[] a = bigInt.toByteArray(); if (a.length < keySize) { byte[] a2 = new byte[keySize]; System.arraycopy(a, 0, a2, a2.length - a.length, a.length); a = a2; } else if (a.length > keySize) { out.println("*** Key doesn't fit value=" + bigInt + " byte length=" + a.length); return; } key.setData(a); } OperationStatus status = db.putNoOverwrite(null, key, data); if (status == OperationStatus.KEYEXIST && randomKeys) { i -= 1; out.println("Random key already exists -- retrying"); continue; } if (status != OperationStatus.SUCCESS) { out.println("*** " + status); return; } if (i % 10000 == 0) { EnvironmentStats stats = env.getStats(null); if (stats.getNNodesScanned() > 0) { out.println("*** Ran out of cache memory at record " + i + " -- try increasing the Java heap size ***"); return; } out.print("."); out.flush(); } } } private static void preloadRecords(final PrintStream out, final Database db) throws DatabaseException { Thread thread = new Thread() { public void run() { while (true) { try { out.print("."); out.flush(); Thread.sleep(5 * 1000); } catch (InterruptedException e) { break; } } } }; thread.start(); db.preload(0); thread.interrupt(); try { thread.join(); } catch (InterruptedException e) { e.printStackTrace(out); } } private static void printStats(PrintStream out, Environment env, String msg) throws DatabaseException { out.println(); out.println(msg + ':'); EnvironmentStats stats = env.getStats(null); out.println("CacheSize=" + INT_FORMAT.format(stats.getCacheTotalBytes()) + " BtreeSize=" + INT_FORMAT.format(stats.getCacheDataBytes())); if (stats.getNNodesScanned() > 0) { out.println("*** All records did not fit in the cache ***"); } } }